###################################################
# observational analysis
# Author: Guillermo Rosas & Jonas Markgraf
# Date: 2023-03-05
###################################################

rm(list = ls())

set.seed(123)

# load libraries
packages <- c("car", "foreign",
              "readxl", "readstata13",
              "tidyverse", "repmis",
              "runjags", "Amelia",
              "gtools", "mice",
              "mitools", "merTools",
              "parallel", "tidyverse",
              "plm", "stargazer",
              "lme4", "arm", "optimx",
              "MASS", "data.table", "mvtnorm")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
lapply(packages, require, character.only = TRUE)
rm(packages, new.packages)

possibles <- c("~/Desktop/replication_files/") # set your work directory

set_valid_wd (possibles)

# load dataset
ess <- read.dta13("finalESS.dta",
                  convert.factors = F)

dataKey <- data.frame(varLabels = attr(ess, "var.labels"))

### recode variables included in analysis
ess$brwmny <- ifelse(ess$brwmny > 5, NA, ess$brwmny)

ess$rlgdgr <- ifelse(ess$rlgdgr > 11, NA, ess$rlgdgr)

ess$agea <- ifelse(ess$agea > 120, NA, ess$agea)

### generate key variables
ess$cntry.yr <- paste0(ess$cou, ess$year)

### generate categorical income for request

ess_dt <- data.table(ess)
ess_dt[, income_cat:=ntile(income, 4), by=list(cntry.yr)]
ess <- as.data.frame(ess_dt)

# add household debt level
hhdebt_dta <- read_excel("Data_Extract_From_World_Development_Indicators.xlsx")

### wide to long
hhdebt_dta %>%
  dplyr::select(-c(`Series Code`)) %>%
  rename('var_name' = `Series Name`
         , 'cntry_name' = `Country Name`
         , 'cntry_code' = `Country Code`) %>%
  pivot_longer(!c(var_name, cntry_code, cntry_name), names_to = 'year'
               , values_to = 'hhdebt2gdp') %>%
  dplyr::select(-c('var_name', 'cntry_name')) %>%
  filter(hhdebt2gdp != '..') %>%
  mutate(year = as.numeric(substr(year, 1, 4))
         , hhdebt2gdp = as.numeric(hhdebt2gdp)) -> hhdebt_dta

ess <- left_join(ess, hhdebt_dta
            , by = c("cou" = "cntry_code"
                     , "year" = "year"))

#########################################################
# Build smaller dataset, only with variables we use
neededVars <- c("gincdif2", "brwmny", "income", "wrongincome"
                ,"our", "male", "agea", "income_cat"
                ,"unemplindiv", "eduyrs2", "hhdebt2gdp","cou" ,"year"
                ,"cntry.yr")

complete.ess <- ess[,is.element(colnames(ess),neededVars)]
complete.ess <- complete.ess[!is.na(complete.ess$income),] # Eliminate surveys that did not collect income
complete.ess <- complete.ess[!is.na(complete.ess$wrongincome) & complete.ess$wrongincome != 1, ]
complete.ess$year <- as.character(complete.ess$year)
complete.ess$log.income <- log(complete.ess$income)


short.ess <- complete.ess


# ###################################################################
# # Carry out multiple imputation of predictors
# ###################################################################
short.ess$country <- substr (short.ess$cntry.yr, start=1, stop=3)
short.ess$year    <- substr (short.ess$cntry.yr, start=4, stop=7)

# Create smaller set for imputation
covariates <- c("brwmny","log.income",
                "male","agea","unemplindiv",
                "our", "income_cat","eduyrs2",
                "mbtru2","rlgdgr","country",
                "year","gincdif2", "cntry.yr",
                "hhdebt2gdp")
smallData <- short.ess[,is.element(colnames(short.ess), covariates)]

a.out <- amelia(smallData
                , m = 5
                , id = c("country",
                         "year",
                         "gincdif2",
                         "cntry.yr",
                         "hhdebt2gdp")
                , logs = "our"
                , noms = c("male","unemplindiv")
                , ords = c("brwmny", "income_cat")
                , p2s = 1
                , parallel = "multicore")

summary (a.out$imputations[[1]])

a.out$imputations[[1]]$our <- ifelse (a.out$imputations[[1]]$our > 42, 42
                                      , a.out$imputations[[1]]$our)
a.out$imputations[[2]]$our <- ifelse (a.out$imputations[[2]]$our > 42, 42
                                      , a.out$imputations[[2]]$our)
a.out$imputations[[3]]$our <- ifelse (a.out$imputations[[3]]$our > 42, 42
                                      , a.out$imputations[[3]]$our)
a.out$imputations[[4]]$our <- ifelse (a.out$imputations[[4]]$our > 42, 42
                                      , a.out$imputations[[4]]$our)
a.out$imputations[[5]]$our <- ifelse (a.out$imputations[[5]]$our > 42, 42
                                      , a.out$imputations[[5]]$our)

a.out.list <- list(a.out$imputations[[1]],
                   a.out$imputations[[2]],
                   a.out$imputations[[3]],
                   a.out$imputations[[4]],
                   a.out$imputations[[5]])

which.omitted <- as.numeric (attr (na.omit(a.out$imputations[[1]]), "na.action"))
save (a.out.list, file = "observational_ess/data/imputedDatasetsCreditPreferences.RData")
rm (a.out)

###################################################################
# Carry out completely-pooled fixed effects model with imputations
###################################################################

# Table 1: ====================================================================

mydata <- imputationList(a.out.list)

mod1.mi <- lapply(mydata$imputations, function(x){
  plm (gincdif2 ~ brwmny + log.income + our +
         unemplindiv + agea + eduyrs2 + male, data=x, index="cntry.yr")})

mod2.mi <- lapply(mydata$imputations, function(x){
  plm (gincdif2 ~ brwmny*log.income + our +
         unemplindiv + agea + eduyrs2 + male, data=x, index="cntry.yr")})

mod3.mi <- lapply(mydata$imputations, function(x){
  plm (gincdif2 ~ brwmny*our + log.income +
         unemplindiv + agea + eduyrs2 + male, data=x, index="cntry.yr")})


# Table 1, Model 1
betas.mod1 <- MIextract(mod1.mi, fun = coef)
vars.mod1 <- MIextract(mod1.mi, fun = vcov)
tab1m1 <- round(summary(MIcombine(betas.mod1, vars.mod1))[,c(1:2)], 3) %>%
  rename(coef.m1 = results,
         se.m1 = se)

# Table 1, Model 2
betas.mod2 <- MIextract(mod2.mi, fun = coef)
vars.mod2 <- MIextract(mod2.mi, fun = vcov)
tab1m2 <- round(summary(MIcombine(betas.mod2, vars.mod2))[,c(1:2)], 3) %>%
  rename(coef.m2 = results,
         se.m2 = se)

# Table 1, Model 3
betas.mod3 <- MIextract(mod3.mi, fun = coef)
vars.mod3 <- MIextract(mod3.mi, fun = vcov)
tab1m3 <- round(summary(MIcombine(betas.mod3, vars.mod3))[,c(1:2)], 3) %>%
  rename(coef.m3 = results,
         se.m3 = se)

# combine all
merge(tab1m1, tab1m2, by = 0, all = T) %>%
  merge(., tab1m3 %>% mutate(Row.names = rownames(.)), by = "Row.names", all = T) %>%
  mutate(Row.names = c("Age", 
                       "Credit access",
                       "Credit access X Income", 
                       "Credit X OUR", 
                       "Education",
                       "Income (log)",
                       "Male",
                       "Occupational unemployment risk (OUR)",
                       "Unemployed"),
         Row.names = factor(Row.names, levels = c("Credit access",
                                                  "Income (log)",
                                                  "Credit access X Income", 
                                                  "Occupational unemployment risk (OUR)",
                                                  "Credit X OUR",
                                                  "Unemployed",
                                                  "Age", 
                                                  "Education",
                                                  "Male"))) %>%
  arrange(Row.names)
  
# observations
n.tab1 <- cbind(mod1 = c(as.data.frame(MIextract(mod1.mi, fun = nobs)[1]),
                  lapply(mydata$imputations, function(x) length(unique(x$cntry.yr)))[1]),
                mod2 = c(as.data.frame(MIextract(mod2.mi, fun = nobs)[1]),
                  lapply(mydata$imputations, function(x) length(unique(x$cntry.yr)))[1]),
                mod3 = c(as.data.frame(MIextract(mod3.mi, fun = nobs)[1]),
                  lapply(mydata$imputations, function(x) length(unique(x$cntry.yr)))[1])
                  )
rownames(n.tab1) <- c("N", "N surveys")
print(n.tab1)


# Figure 1 ====================================================================

graphPath <- "/graphPlots/"
income <- seq (min (short.ess$log.income), max (short.ess$log.income), length=50) # min to maximum of objects2save$data$W[,6]
our <- seq (min (short.ess$our, na.rm=T), max (short.ess$our, na.rm=T), length=50) # slightly less than min to maximum
credit.access <- sort (unique (short.ess$brwmny))

credit.coef.mod.2 <- rbind (
  rmvnorm(100, betas.mod2[[1]][c(1,8)], vars.mod2[[1]][c(1,8),c(1,8)])
  , rmvnorm(100, betas.mod2[[2]][c(1,8)], vars.mod2[[2]][c(1,8),c(1,8)])
  , rmvnorm(100, betas.mod2[[3]][c(1,8)], vars.mod2[[3]][c(1,8),c(1,8)])
  , rmvnorm(100, betas.mod2[[4]][c(1,8)], vars.mod2[[4]][c(1,8),c(1,8)])
  , rmvnorm(100, betas.mod2[[5]][c(1,8)], vars.mod2[[5]][c(1,8),c(1,8)])
)
credit.coef.mod.3 <- rbind (
  rmvnorm(100, betas.mod3[[1]][c(1,8)], vars.mod3[[1]][c(1,8),c(1,8)])
  , rmvnorm(100, betas.mod3[[2]][c(1,8)], vars.mod3[[2]][c(1,8),c(1,8)])
  , rmvnorm(100, betas.mod3[[3]][c(1,8)], vars.mod3[[3]][c(1,8),c(1,8)])
  , rmvnorm(100, betas.mod3[[4]][c(1,8)], vars.mod3[[4]][c(1,8),c(1,8)])
  , rmvnorm(100, betas.mod3[[5]][c(1,8)], vars.mod3[[5]][c(1,8),c(1,8)])
)
income.coef.mod.2 <- rbind (
  rmvnorm(100, betas.mod2[[1]][c(3,8)], vars.mod2[[1]][c(3,8),c(3,8)])
  , rmvnorm(100, betas.mod2[[2]][c(3,8)], vars.mod2[[2]][c(3,8),c(3,8)])
  , rmvnorm(100, betas.mod2[[3]][c(3,8)], vars.mod2[[3]][c(3,8),c(3,8)])
  , rmvnorm(100, betas.mod2[[4]][c(3,8)], vars.mod2[[4]][c(3,8),c(3,8)])
  , rmvnorm(100, betas.mod2[[5]][c(3,8)], vars.mod2[[5]][c(3,8),c(3,8)])
)
our.coef.mod.3 <- rbind (
  rmvnorm(100, betas.mod3[[1]][c(2,8)], vars.mod3[[1]][c(2,8),c(2,8)])
  , rmvnorm(100, betas.mod3[[2]][c(2,8)], vars.mod3[[2]][c(2,8),c(2,8)])
  , rmvnorm(100, betas.mod3[[3]][c(2,8)], vars.mod3[[3]][c(2,8),c(2,8)])
  , rmvnorm(100, betas.mod3[[4]][c(2,8)], vars.mod3[[4]][c(2,8),c(2,8)])
  , rmvnorm(100, betas.mod3[[5]][c(2,8)], vars.mod3[[5]][c(2,8),c(2,8)])
)

income.credit.effect <- matrix (NA, nrow=5, ncol=5)
for (i in 1:5){
  effect <- income.coef.mod.2 %*% c(1, credit.access[i])
  income.credit.effect[,i] <- quantile (effect, prob=c(0.025,0.05,0.5,0.95,0.975))
}

our.credit.effect <- matrix (NA, nrow=5, ncol=5)
for (i in 1:5){
  effect <- our.coef.mod.3 %*% c(1, credit.access[i])
  our.credit.effect[,i] <- quantile (effect, prob=c(0.025,0.05,0.5,0.95,0.975))
}

credit.income.effect <- matrix (NA, nrow=5, ncol=50)
for (i in 1:50){
  effect <- credit.coef.mod.2 %*% c(1, income[i])
  credit.income.effect[,i] <- quantile (effect, prob=c(0.025,0.05,0.5,0.95,0.975))
}

credit.our.effect <- matrix (NA, nrow=5, ncol=50)
for (i in 1:50){
  effect <- credit.coef.mod.3 %*% c(1, our[i])
  credit.our.effect[,i] <- quantile (effect, prob=c(0.025,0.05,0.5,0.95,0.975))
}

hist.Income <- hist (short.ess$log.income, plot=F)
hist.Income$counts = hist.Income$counts/sum(hist.Income$counts)
hist.our <- hist (short.ess$our, plot=F)
hist.our$counts = hist.our$counts/sum(hist.our$counts)
hist.Brwmny <- hist (short.ess$brwmny, plot=F)
hist.Brwmny$counts = hist.Brwmny$counts/sum(hist.Brwmny$counts)

### left-hand figure
par (mar=c(3,3,1,3), las=0)
plot(hist.Income, 
     axes = FALSE, col="gray", border="gray",
     main ="", xlab = "", ylab = "", ylim=c(0,0.25))
axis(side = 4)  
mtext("Relative frequency of income", side = 4, line = 2)
par(new = TRUE)
plot (c(min(income), max(income)), c(min(credit.income.effect)-0.1, max(credit.income.effect)+0.1)
      ,  type="n"
      , main=""
      , xlab=""
      , ylab=""
      , ylim=c(-0.25,0.1)
      , bty="n")
mtext(side=1, line=2, text="Income (log)")
mtext(side=2, line=2, text="Effect of credit access conditional on income")
points (xy.coords(income, credit.income.effect[3,]), pch=19)
segments (x0=income, x1=income, y0=credit.income.effect[1,], y1=credit.income.effect[5,])
abline (h=0, lty=3)


### right-hand figure
par (mar=c(3,3,1,3), las=0)
plot(hist.our, 
     axes = FALSE, col="gray", border="gray",
     main ="", xlab = "", ylab = "")
axis(side = 4)   
mtext("Relative frequency of occupational unemployment risk", side = 4, line = 2)
par(new = TRUE)                         
plot (c(min(our), max(our)), c(min(credit.our.effect)-0.01, max(credit.our.effect)+0.01)
      ,  type="n"
      , main=""
      , xlab=""
      , ylab=""
      , ylim=c(-0.25,0.1)
      , bty="n"
      , axes=T)
mtext(side=1, line=2, text="Occupational unemployment risk")
mtext(side=2, line=2, text="Effect of credit access conditional on occupational unemployment risk")
points (xy.coords(our, credit.our.effect[3,]), pch=19)
segments (x0=our, x1=our
          , y0=credit.our.effect[1,], y1=credit.our.effect[5,])
abline (h=0, lty=3)

# Figure B.1: correlation between bank credit and credit access ===========================

short.ess %>%
  group_by(country, year, hhdebt2gdp) %>%
  summarize(brwmny = mean(brwmny, na.rm=T)) %>%
  ggplot(., aes(x = hhdebt2gdp, y = brwmny)) +
  geom_smooth(method=lm , color="black", fill="grey", se=TRUE) + 
  geom_point(shape = 3) +
  xlab("Bank Credit to GDP (%)") +
  ylab("Mean Reported Access to Credit") + 
  ylim(1,5) +
  theme_bw() -> gg_fig_b1
print(gg_fig_b1)

# Table B.1: percentage of Respondents with Easy Credit Access ===========================

short.ess %>%
  mutate(brwmny.cat = ifelse(brwmny > 3, 1, 0)) %>%
  group_by(year) %>%
  summarize(perc_goodaccess = mean(brwmny.cat, na.rm = T) * 100)

# Figure B.2: self-reported credit access by country-year ================================

short.ess %>%
  mutate(brwmny.cat = ifelse(brwmny > 3, 1, 0)) %>%
  group_by(cntry.yr) %>%
  summarize(access = mean(brwmny.cat, na.rm = T)) %>%
  mutate(cntry = substr(cntry.yr, 1,3),
         year = as.numeric(substr(cntry.yr, 4,7))) %>%
  dplyr::select(-cntry.yr) %>%
  as.data.frame(.) %>%
  reshape(., idvar="cntry",timevar="year",direction="wide") -> crdt.CntryYr

# plot
# pdf(file = "graphPlots/crdtAcs_cntryYr.pdf",
#     width = 7,
#     height = 5)
barplot(as.matrix(t(crdt.CntryYr[, c(2:ncol(crdt.CntryYr))])), beside = T, col=grey.colors(5),
        ylab="Ability to Borrow", names.arg = crdt.CntryYr$cntry, ylim=c(0,1), cex.names = 0.7,
        cex.axis = 0.7,
        las = 2)
box(bty="l")
legend("topleft", legend = c("2002", "2004", "2006", "2008", "2010"), fill = grey.colors(5)
       , cex = .7, bty = "n")
# dev.off()

# Table B.2: explaining access to credit =========================================

tabb2m1.mi <- lapply(mydata$imputations, function(x){
  plm (brwmny ~ log.income + our +
         unemplindiv + agea + eduyrs2 + male, data=x, index="cntry.yr")})

betas.tabb2m1 <- MIextract(tabb2m1.mi, fun = coef)
vars.tabb2m1 <- MIextract(tabb2m1.mi, fun = vcov)
round(summary(MIcombine(betas.tabb2m1, vars.tabb2m1))[,c(1:2)], 3) %>%
  rename(coef.m1 = results,
         se.m1 = se) %>%
  mutate(vars = c("Income (log)",
                  "Occupational risk",
                  "Unemployed",
                  "Age",
                  "Education (years)",
                  "Male")) %>%
  dplyr::select(vars, coef.m1, se.m1)
           

# Table B.3: financialization measure =========================================

### create variables for analysis at aggregated level
mydata_agg <- lapply(mydata[[1]], function(x){
  x <- x %>%
    group_by(cntry.yr, country, year, hhdebt2gdp) %>%
    summarize(
      redist_agg = mean(gincdif2, na.rm = T)
      , age_agg = mean(agea, na.rm = T)
      , brwmny_agg = mean(brwmny, na.rm = T)
      , lnincome_agg = mean(log.income, na.rm = T)
      , male_agg = mean(male, na.rm = T)
      , male_agg = mean(male, na.rm = T)
      , our_agg = mean(our, na.rm = T)
      , unemplindiv_agg = mean(unemplindiv, na.rm = T)
      , eduyrs_agg = mean(eduyrs2, na.rm = T)
    )
})

### run models at survey-level w/out any FE

## model 1
m1_agg <- lapply(mydata_agg, function(x){
  lm(redist_agg ~ brwmny_agg + lnincome_agg + our_agg +
       unemplindiv_agg + age_agg + eduyrs_agg + male_agg
     , data=x)
})

m1_agg_coefs <- MIextract(m1_agg, fun = coef)
m1_agg_vars <- MIextract(m1_agg, fun = vcov)
tabb3m1 <- round(summary(MIcombine(m1_agg_coefs, m1_agg_vars))[,c(1:2)], 3) %>%
  rename(coef.m1 = results,
         se.m1 = se)

## model 2
m2_agg <- lapply(mydata_agg, function(x){
  lm(redist_agg ~ hhdebt2gdp + lnincome_agg + our_agg +
       unemplindiv_agg + age_agg + eduyrs_agg + male_agg
     , data=x)
})

m2_agg_coefs <- MIextract(m2_agg, fun = coef)
m2_agg_vars <- MIextract(m2_agg, fun = vcov)
tabb3m2 <- round(summary(MIcombine(m2_agg_coefs, m2_agg_vars))[,c(1:2)], 3) %>%
  rename(coef.m2 = results,
         se.m2 = se)


## model 3
m3_agg <- lapply(mydata_agg, function(x){
  lm(redist_agg ~ brwmny_agg + hhdebt2gdp + lnincome_agg + our_agg +
       unemplindiv_agg + age_agg + eduyrs_agg + male_agg
     , data=x)
})

m3_agg_coefs <- MIextract(m3_agg, fun = coef)
m3_agg_vars <- MIextract(m3_agg, fun = vcov)
tabb3m3 <- round(summary(MIcombine(m3_agg_coefs, m3_agg_vars))[,c(1:2)], 3) %>%
  rename(coef.m3 = results,
         se.m3 = se)

## model 4
m1_agg_fe <- lapply(mydata_agg, function(x){
  plm(redist_agg ~ brwmny_agg + lnincome_agg + our_agg +
        unemplindiv_agg + age_agg + eduyrs_agg + male_agg
      , data=x, index="country")
})

m1_agg_fe_coefs <- MIextract(m1_agg_fe, fun = coef)
m1_agg_fe_vars <- MIextract(m1_agg_fe, fun = vcov)
tabb3m4 <- round(summary(MIcombine(m1_agg_fe_coefs, m1_agg_fe_vars))[,c(1:2)], 3) %>%
  rename(coef.m4 = results,
         se.m4 = se)


## model 5
m2_agg_fe <- lapply(mydata_agg, function(x){
  plm(redist_agg ~ hhdebt2gdp + lnincome_agg + our_agg +
        unemplindiv_agg + age_agg + eduyrs_agg + male_agg
      , data=x, index="country")
})

m2_agg_fe_coefs <- MIextract(m2_agg_fe, fun = coef)
m2_agg_fe_vars <- MIextract(m2_agg_fe, fun = vcov)
tabb3m5 <- round(summary(MIcombine(m2_agg_fe_coefs, m2_agg_fe_vars))[,c(1:2)], 3) %>%
  rename(coef.m5 = results,
         se.m5 = se)

## model 6
m3_agg_fe <- lapply(mydata_agg, function(x){
  plm(redist_agg ~ brwmny_agg + hhdebt2gdp + lnincome_agg + our_agg +
        unemplindiv_agg + age_agg + eduyrs_agg + male_agg
      , data=x, index="country")
})

m3_agg_fe_coefs <- MIextract(m3_agg_fe, fun = coef)
m3_agg_fe_vars <- MIextract(m3_agg_fe, fun = vcov)
tabb3m6 <- round(summary(MIcombine(m3_agg_fe_coefs, m3_agg_fe_vars))[,c(1:2)], 3) %>%
  rename(coef.m6 = results,
         se.m6 = se)

# combine all
merge(tabb3m1, tabb3m2, by = 0, all = T) %>%
  merge(., tabb3m3 %>% mutate(Row.names = rownames(.)), by = "Row.names", all = T) %>%
  merge(., tabb3m4 %>% mutate(Row.names = rownames(.)), by = "Row.names", all = T) %>%
  merge(., tabb3m5 %>% mutate(Row.names = rownames(.)), by = "Row.names", all = T) %>%
  merge(., tabb3m6 %>% mutate(Row.names = rownames(.)), by = "Row.names", all = T) %>%
  filter(Row.names != "(Intercept)") %>%
  mutate(Row.names = c("Age", 
                       "Credit access",
                       "Education",
                       "Household debt-to-GDP",
                       "Income (log)",
                       "Male",
                       "Occupational unemployment risk (OUR)",
                       "Unemployed"),
         Row.names = factor(Row.names, levels = c("Credit access",
                                                  "Household debt-to-GDP",
                                                  "Income (log)",
                                                  "Occupational unemployment risk (OUR)",
                                                  "Unemployed",
                                                  "Age", 
                                                  "Education",
                                                  "Male"))) %>%
  arrange(Row.names)

# nobs
n.tabb3 <- cbind(mod1 = c(as.data.frame(MIextract(m1_agg, fun = nobs)[1])),
                mod2 = c(as.data.frame(MIextract(m2_agg, fun = nobs)[1])),
                mod3 = c(as.data.frame(MIextract(m3_agg, fun = nobs)[1])),
                mod4 = c(as.data.frame(MIextract(m1_agg_fe, fun = nobs)[1])),
                mod5 = c(as.data.frame(MIextract(m2_agg_fe, fun = nobs)[1])),
                mod6 = c(as.data.frame(MIextract(m3_agg_fe, fun = nobs)[1]))
)
rownames(n.tabb3) <- c("N")
print(n.tabb3)

# Figure C.1: re-run with income as quintile ===================================

mod.mi.income_cat <- lapply(mydata$imputations, function(x){
  plm (gincdif2 ~ brwmny*as.factor(income_cat) + our +
         unemplindiv + agea + eduyrs2 + male, data=x, index= 'cntry.yr')
}
)

betas.mod.income_cat <- MIextract(mod.mi.income_cat, fun = coef)
vars.mod.income_cat <- MIextract(mod.mi.income_cat, fun = vcov)
round(summary(MIcombine(betas.mod.income_cat, vars.mod.income_cat))[,-5], 3)

### create result table for plotting
exp_plot <- summary(MIcombine(betas.mod.income_cat
                              , vars.mod.income_cat))[c(1, 10:12),c(1:2)] %>%
  rownames_to_column() %>%
  rename(var = rowname) %>%
  mutate(var = case_match(var
                      , 'brwmny' ~ '1st Quartile'
                      , 'brwmny:as.factor(income_cat)2' ~ '2nd Quartile'
                      , 'brwmny:as.factor(income_cat)3' ~ '3rd Quartile'
                      , 'brwmny:as.factor(income_cat)4' ~ '4th Quartile')
         , results = case_when(var != '1st Quartile' ~ results[var == '1st Quartile'] + results
                               , TRUE ~ results))

## combine MI estimates for single covariance matrix
### source function
source('observational_ess/function_combine_mi_estimates.R')

### create single matrix
mod.mi.income_cat_cvar <- comb.mi(mod.mi.income_cat)$comb.cov

### adjust SEs for plotting
exp_plot <- exp_plot %>%
  mutate(se = case_when(var == '2nd Quartile' ~ sqrt(mod.mi.income_cat_cvar[1,1] 
                                                     + mod.mi.income_cat_cvar[10 ,10] 
                                                     + 2 * mod.mi.income_cat_cvar[1,10])
                        , var == '3rd Quartile' ~ sqrt(mod.mi.income_cat_cvar[1,1] 
                                                       + mod.mi.income_cat_cvar[11 ,11] 
                                                       + 2 * mod.mi.income_cat_cvar[1,11])
                        , var == '4th Quartile' ~ sqrt(mod.mi.income_cat_cvar[1,1] 
                                                       + mod.mi.income_cat_cvar[12 ,12] 
                                                       + 2 * mod.mi.income_cat_cvar[1,12])
                        , TRUE ~ se))

ggplot(exp_plot, aes(x = var, y = results)) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_point(aes(x = var, 
                 y = results)) + 
  geom_linerange(aes(x = var, 
                     ymin = results - 1.645 * se,
                     ymax = results + 1.645 * se),
                 lwd = 1) +
  geom_linerange(aes(x = var, 
                     ymin = results - 1.96 * se,
                     ymax = results + 1.96 * se),
                 lwd = 1/2) +
  xlab('Income quartiles (by country-year)') +
  ylab('Effect of credit access by income quartiles') +
  theme_bw() -> plot_income_cat
plot_income_cat

# Table C.1: country instead of country-year FE ===============================

### Model 1
mod1.mi.cntry <- lapply(mydata$imputations, function(x){
  plm (gincdif2 ~ brwmny + log.income + our +
         unemplindiv + agea + eduyrs2 + male, data=x, index="country")})

betas.mod1.cntry <- MIextract(mod1.mi.cntry, fun = coef)
vars.mod1.cntry <- MIextract(mod1.mi.cntry, fun = vcov)
tabc1m1 <- round(summary(MIcombine(betas.mod1.cntry, vars.mod1.cntry))[,c(1:2)], 3) %>%
  rename(coef.m1 = results,
         se.m1 = se)

### Model 2
mod2.mi.cntry <- lapply(mydata$imputations, function(x){
  plm (gincdif2 ~ brwmny*log.income + our +
         unemplindiv + agea + eduyrs2 + male, data=x, index="country")})

betas.mod2.cntry <- MIextract(mod2.mi.cntry, fun = coef)
vars.mod2.cntry <- MIextract(mod2.mi.cntry, fun = vcov)
tabc1m2 <- round(summary(MIcombine(betas.mod2.cntry, vars.mod2.cntry))[,c(1:2)], 3) %>%
  rename(coef.m2 = results,
         se.m2 = se)

### Model 3
mod3.mi.cntry <- lapply(mydata$imputations, function(x){
  plm (gincdif2 ~ brwmny*our + log.income +
         unemplindiv + agea + eduyrs2 + male, data=x, index="country")})

betas.mod3.cntry <- MIextract(mod3.mi.cntry, fun = coef)
vars.mod3.cntry <- MIextract(mod3.mi.cntry, fun = vcov)
tabc1m3 <- round(summary(MIcombine(betas.mod3.cntry, vars.mod3.cntry))[,c(1:2)], 3) %>%
  rename(coef.m3 = results,
         se.m3 = se)

# combine all
merge(tabc1m1, tabc1m2, by = 0, all = T) %>%
  merge(., tabc1m3 %>% mutate(Row.names = rownames(.)), by = "Row.names", all = T) %>%
  mutate(Row.names = c("Age", 
                       "Credit access",
                       "Credit X Income",
                       "Credit X OUR",
                       "Education",
                       "Income (log)",
                       "Male",
                       "Occupational unemployment risk (OUR)",
                       "Unemployed"),
         Row.names = factor(Row.names, levels = c("Credit access",
                                                  "Income (log)",
                                                  "Credit X Income",
                                                  "Occupational unemployment risk (OUR)",
                                                  "Credit X OUR",
                                                  "Unemployed",
                                                  "Age", 
                                                  "Education",
                                                  "Male"))) %>%
  arrange(Row.names)

# nobs
n.tabc1 <- cbind(mod1 = c(as.data.frame(MIextract(mod1.mi.cntry, fun = nobs)[1]),
                          lapply(mydata$imputations, function(x) length(unique(x$cntry.yr)))[1]),
                 mod2 = c(as.data.frame(MIextract(mod2.mi.cntry, fun = nobs)[1]),
                          lapply(mydata$imputations, function(x) length(unique(x$cntry.yr)))[1]),
                 mod3 = c(as.data.frame(MIextract(mod3.mi.cntry, fun = nobs)[1]),
                          lapply(mydata$imputations, function(x) length(unique(x$cntry.yr)))[1])
)
rownames(n.tabc1) <- c("N", "N surveys")
print(n.tabc1)
